#Packages applied
library("twitteR")
## Warning: package 'twitteR' was built under R version 4.2.2
library(tm)
## Warning: package 'tm' was built under R version 4.2.2
## Loading required package: NLP
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:twitteR':
##
## id, location
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("plotly")
## Warning: package 'plotly' was built under R version 4.2.2
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(RColorBrewer)
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.2.2
library(stringr)
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.2.2
library(rtweet)
## Warning: package 'rtweet' was built under R version 4.2.2
##
## Attaching package: 'rtweet'
## The following object is masked from 'package:twitteR':
##
## lookup_statuses
library(corpus)
## Warning: package 'corpus' was built under R version 4.2.2
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 4.2.2
library(wordcloud2)
## Warning: package 'wordcloud2' was built under R version 4.2.2
library(syuzhet)
## Warning: package 'syuzhet' was built under R version 4.2.2
##
## Attaching package: 'syuzhet'
## The following object is masked from 'package:rtweet':
##
## get_tokens
#set the directory to save data
setwd("C:/CS101_DATA_SCIENCE/Nalaza_Repo/Individual Project/Individual Project 2")
#Extract from twitter using your developer’s credentials.
CONSUMER_SECRET <- "gbuzairwHJDlzG6zmK3fFxqcEo2GIHbRy89NISU80IaqvVhjIx"
CONSUMER_KEY <- "s2yMTMlykz9iJ2I86kHzWsqtF"
ACCESS_SECRET <- "znEI53FQ3P1xhF3BAfvwiptXKtTMSAmD9BTzozQD8ax98"
ACCESS_TOKEN <- "1598161218618867713-qojQanThDqRQOlGJ4YcCXhFkYDx1gr"
setup_twitter_oauth(consumer_key = CONSUMER_KEY,
consumer_secret = CONSUMER_SECRET,
access_token = ACCESS_TOKEN,
access_secret = ACCESS_SECRET)
## [1] "Using direct authentication"
#Get 10000 observations including retweets
trendTweets <- searchTwitter(“#TheGameAwards”, n = 10000, lang = “en”, since = “2022-12-01”, until = “2022-12-08”, retryOnRateLimit=120)
#Converting data into dataframe
TrendTweetsDF <- twListToDF(trendTweets)
#Saving File
save(TrendTweetsDF,file = “TrendTweetsDF.Rdata”)
#using the existed data
load(file = "TrendTweetsDF.Rdata")
#Subset the retweets and the original tweets into a separate file #Plot the retweets and the original tweets using bar graph in vertical manner and include legends
#Original Tweets
Original <- subset(TrendTweetsDF, isRetweet=="FALSE",
select= c(text,screenName,created, isRetweet))
Original %>%
group_by(1) %>%
summarise(max = max(created), min= min(created))
## # A tibble: 1 × 3
## `1` max min
## <dbl> <dttm> <dttm>
## 1 1 2022-12-07 23:59:36 2022-12-07 17:56:49
Tweets <- Original %>% mutate(Created_At_Round =
created %>% round(units = 'hours') %>% as.POSIXct())
Minimum <- Tweets %>% pull(created) %>% min()
Minimum
## [1] "2022-12-07 17:56:49 UTC"
Maximum <- Tweets %>% pull(created) %>% max()
Maximum
## [1] "2022-12-07 23:59:36 UTC"
#Plot of the original tweets
Original_tweets <- ggplot(Tweets, aes(x = created)) +
geom_histogram(aes(fill = ..count..)) +
theme(legend.position = "right") +
xlab("Time") + ylab("Number of tweets") +
scale_fill_gradient(low = "midnightblue", high = "purple") +
labs(title = "The Original Tweets", subtitle = "December 7,2022")
Original_tweets %>% ggplotly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#The Retweets
Retweets <- subset(TrendTweetsDF, isRetweet=="TRUE",
select= c(text,screenName,created, isRetweet))
Retweets %>%
group_by(1) %>%
summarise(max = max(created), min = min(created))
## # A tibble: 1 × 3
## `1` max min
## <dbl> <dttm> <dttm>
## 1 1 2022-12-07 23:59:53 2022-12-07 17:55:49
Retweets_1 <- Retweets %>% mutate(Created_At_Round = created%>%
round(units = 'hours') %>%
as.POSIXct())
mini <- Retweets_1 %>% pull(created) %>% min()
mini
## [1] "2022-12-07 17:55:49 UTC"
maxi <- Retweets_1 %>% pull(created) %>% max()
maxi
## [1] "2022-12-07 23:59:53 UTC"
#Plot of the retweets
Retweets_2<- ggplot(Retweets_1, aes(x = created)) +
geom_histogram(aes(fill = ..count..)) +
theme(legend.position = "right") +
xlab("Time") + ylab("Number of Retweets") +
scale_fill_gradient(low = "midnightblue", high = "skyblue") +
labs(title = "The Retweets", subtitle = "December 7,2022")
Retweets_2 %>% ggplotly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.